After the virtual screening with each QSAR model, let’s concatenate all the outputs to analyze the molecules and ranking them according to different scores
indices_2 <- read.csv("indice_2.csv", stringsAsFactors = FALSE, header = TRUE, sep = ",")
indices_2 <- cbind(indices_2, c(1:310))
colnames(indices_2) <- c("Label", "Index")
indices_3 <- read.csv("indice_3.csv", stringsAsFactors = FALSE, header = TRUE, sep = ",")
indices_3 <- cbind(indices_3, c(1:465))
colnames(indices_3) <- c("Label", "Index")
## LASSO VS
library(knitr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
LASSO_2 <- read.csv("VS_LASSO-2.csv", stringsAsFactors = FALSE, header = TRUE, sep = ",")
LASSO_2 <- left_join(LASSO_2, indices_2, by = "Index")
LASSO_3 <- read.csv("VS_LASSO-3.csv", stringsAsFactors = FALSE, header = TRUE, sep = ",")
LASSO_3 <- left_join(LASSO_3, indices_3, by = "Index")
## LASSO-MLR VS
L_MLR_2 <- read.csv("VS_MLRLASSO-2.csv", stringsAsFactors = FALSE, header = TRUE,
sep = ",")
L_MLR_2 <- left_join(L_MLR_2, indices_2, by = "Index")
L_MLR_3 <- read.csv("VS_MLRLASSO-3.csv", stringsAsFactors = FALSE, header = TRUE,
sep = ",")
L_MLR_3 <- left_join(L_MLR_3, indices_3, by = "Index")
## GA-MLR VS
GA_MLR_2 <- read.csv("VS_GAMLR-2.csv", stringsAsFactors = FALSE, header = TRUE, sep = ",")
GA_MLR_2 <- left_join(GA_MLR_2, indices_2, by = "Index")
GA_MLR_3 <- read.csv("VS_GAMLR-3.csv", stringsAsFactors = FALSE, header = TRUE, sep = ",")
GA_MLR_3 <- left_join(GA_MLR_3, indices_3, by = "Index")
## PLS VS
PLS_2 <- read.csv("VS_PLS-2.csv", stringsAsFactors = FALSE, header = TRUE, sep = ",")
PLS_2 <- left_join(PLS_2, indices_2, by = "Index")
PLS_3 <- read.csv("VS_PLS-3.csv", stringsAsFactors = FALSE, header = TRUE, sep = ",")
PLS_3 <- left_join(PLS_3, indices_3, by = "Index")
## RSM VS
RSM_2 <- read.csv("VS_RSMMLR-2.csv", stringsAsFactors = FALSE, header = TRUE, sep = ",")
RSM_2 <- left_join(RSM_2, indices_2, by = "Index")
RSM_3 <- read.csv("VS_RSMMLR-3.csv", stringsAsFactors = FALSE, header = TRUE, sep = ",")
RSM_3 <- left_join(RSM_3, indices_3, by = "Index")
## Concatenating datasets
set.seed(10)
Moleculas_2 <- as.data.frame(cbind(LASSO_2$Index, L_MLR_2$Index, GA_MLR_2$Index,
PLS_2$Index, RSM_2$Index))
colnames(Moleculas_2) <- c("LASSO", "LASSO-MLR", "GA-MLR", "PLS", "RSM")
Nombres_2 <- as.data.frame(cbind(LASSO_2$Label, L_MLR_2$Label, GA_MLR_2$Label, PLS_2$Label,
RSM_2$Label))
colnames(Nombres_2) <- c("LASSO", "LASSO-MLR", "GA-MLR", "PLS", "RSM")
Moleculas_3 <- as.data.frame(cbind(LASSO_3$Index, L_MLR_3$Index, GA_MLR_3$Index,
PLS_3$Index, RSM_3$Index))
colnames(Moleculas_3) <- c("LASSO", "LASSO-MLR", "GA-MLR", "PLS", "RSM")
Nombres_3 <- as.data.frame(cbind(LASSO_3$Label, L_MLR_3$Label, GA_MLR_3$Label, PLS_3$Label,
RSM_3$Label))
colnames(Nombres_3) <- c("LASSO", "LASSO-MLR", "GA-MLR", "PLS", "RSM")
## Two variants
Frecuencia_2 <- table(unlist(Moleculas_2[1:20, ]))
Frecuencia_2 <- as.data.frame(Frecuencia_2)
colnames(Frecuencia_2) <- c("Molecule", "Occurrence frequency 2")
Frecuencia_2$Molecule <- as.numeric(as.character(Frecuencia_2$Molecule))
## Three variants
Frecuencia_3 <- table(unlist(Moleculas_3[1:20, ]))
Frecuencia_3 <- as.data.frame(Frecuencia_3)
colnames(Frecuencia_3) <- c("Molecule", "Occurrence frequency 3")
Frecuencia_3$Molecule <- as.numeric(as.character(Frecuencia_3$Molecule))
## Matching each frequency with initial index
I_INDEX_2 <- match(Frecuencia_2$Molecule, Moleculas_2$LASSO)
II_INDEX_2 <- match(Frecuencia_2$Molecule, Moleculas_2$`LASSO-MLR`)
III_INDEX_2 <- match(Frecuencia_2$Molecule, Moleculas_2$`GA-MLR`)
IV_INDEX_2 <- match(Frecuencia_2$Molecule, Moleculas_2$PLS)
V_INDEX_2 <- match(Frecuencia_2$Molecule, Moleculas_2$RSM)
Frecuencia_2_final <- as.data.frame(cbind(Frecuencia_2$Molecule, I_INDEX_2, II_INDEX_2,
III_INDEX_2, IV_INDEX_2, V_INDEX_2))
colnames(Frecuencia_2_final) <- c("Molecule", "LASSO", "LASSO_MLR", "GA_MLR", "PLS",
"RSM")
kable(Frecuencia_2_final[1:20, ], align = "cccccc")
| Molecule | LASSO | LASSO_MLR | GA_MLR | PLS | RSM |
|---|---|---|---|---|---|
| 1 | 21 | 21 | 11 | 22 | 192 |
| 2 | 22 | 22 | 12 | 27 | 193 |
| 5 | 25 | 25 | 1 | 118 | 1 |
| 6 | 26 | 26 | 2 | 135 | 2 |
| 7 | 11 | 11 | 143 | 8 | 64 |
| 8 | 12 | 12 | 144 | 3 | 65 |
| 11 | 29 | 29 | 3 | 121 | 3 |
| 12 | 30 | 30 | 4 | 63 | 4 |
| 21 | 13 | 13 | 149 | 6 | 68 |
| 22 | 14 | 14 | 150 | 1 | 69 |
| 29 | 15 | 15 | 209 | 17 | 129 |
| 30 | 16 | 16 | 210 | 21 | 130 |
| 35 | 49 | 49 | 10 | 20 | 163 |
| 42 | 56 | 56 | 13 | 23 | 194 |
| 46 | 60 | 60 | 268 | 12 | 298 |
| 55 | 69 | 69 | 14 | 13 | 117 |
| 71 | 85 | 85 | 6 | 14 | 122 |
| 72 | 86 | 86 | 7 | 10 | 123 |
| 82 | 96 | 96 | 15 | 16 | 195 |
| 85 | 99 | 99 | 16 | 18 | 196 |
I_INDEX_3 <- match(Frecuencia_3$Molecule, Moleculas_3$LASSO)
II_INDEX_3 <- match(Frecuencia_3$Molecule, Moleculas_3$`LASSO-MLR`)
III_INDEX_3 <- match(Frecuencia_3$Molecule, Moleculas_3$`GA-MLR`)
IV_INDEX_3 <- match(Frecuencia_3$Molecule, Moleculas_3$PLS)
V_INDEX_3 <- match(Frecuencia_3$Molecule, Moleculas_3$RSM)
Frecuencia_3_final <- as.data.frame(cbind(Frecuencia_3$Molecule, I_INDEX_3, II_INDEX_3,
III_INDEX_3, IV_INDEX_3, V_INDEX_3))
colnames(Frecuencia_3_final) <- c("Molecule", "LASSO", "LASSO_MLR", "GA_MLR", "PLS",
"RSM")
kable(Frecuencia_3_final[1:20, ], align = "cccccc")
| Molecule | LASSO | LASSO_MLR | GA_MLR | PLS | RSM |
|---|---|---|---|---|---|
| 1 | 31 | 31 | 13 | 28 | 286 |
| 7 | 37 | 37 | 1 | 195 | 1 |
| 8 | 38 | 38 | 2 | 175 | 2 |
| 9 | 39 | 39 | 3 | 129 | 3 |
| 10 | 16 | 16 | 215 | 1 | 98 |
| 11 | 17 | 17 | 216 | 38 | 99 |
| 12 | 18 | 18 | 217 | 5 | 100 |
| 16 | 43 | 43 | 4 | 155 | 4 |
| 17 | 44 | 44 | 5 | 117 | 5 |
| 18 | 45 | 45 | 6 | 159 | 6 |
| 31 | 19 | 19 | 224 | 42 | 104 |
| 32 | 20 | 20 | 225 | 43 | 105 |
| 43 | 22 | 22 | 314 | 20 | 197 |
| 54 | 75 | 75 | 12 | 24 | 250 |
| 59 | 80 | 80 | 410 | 10 | 452 |
| 62 | 83 | 83 | 14 | 15 | 287 |
| 68 | 89 | 89 | 411 | 12 | 453 |
| 83 | 104 | 104 | 15 | 48 | 181 |
| 84 | 105 | 105 | 16 | 37 | 182 |
| 121 | 142 | 142 | 17 | 18 | 288 |
Moleculas_2_Total <- cbind(Moleculas_2, Nombres_2)
Moleculas_2_Total <- Moleculas_2_Total[, c(1, 6, 2, 7, 3, 8, 4, 9, 5, 10)]
kable(Moleculas_2_Total[1:20, ], align = "cccccccccc")
| LASSO | LASSO.1 | LASSO-MLR | LASSO-MLR.1 | GA-MLR | GA-MLR.1 | PLS | PLS.1 | RSM | RSM.1 |
|---|---|---|---|---|---|---|---|---|---|
| 115 | 446101 | 115 | 446101 | 5 | 118707521 | 22 | 54758613 | 5 | 118707521 |
| 116 | 446101 | 116 | 446101 | 6 | 118707521 | 125 | 440271 | 6 | 118707521 |
| 215 | ZINC000019893747 | 215 | ZINC000019893747 | 11 | 76325328 | 8 | 91820057 | 11 | 76325328 |
| 216 | ZINC000019893747 | 216 | ZINC000019893747 | 12 | 76325328 | 126 | 440271 | 12 | 76325328 |
| 275 | ZINC000257346903 | 275 | ZINC000257346903 | 246 | ZINC000101361998 | 127 | 440235 | 147 | 198515 |
| 276 | ZINC000257346903 | 276 | ZINC000257346903 | 71 | 12600646 | 21 | 54758613 | 148 | 198515 |
| 277 | ZINC000257347638 | 277 | ZINC000257347638 | 72 | 12600646 | 128 | 440235 | 149 | 198514 |
| 278 | ZINC000257347638 | 278 | ZINC000257347638 | 96 | 5284365 | 7 | 91820057 | 277 | ZINC000257347638 |
| 279 | ZINC000257347639 | 279 | ZINC000257347639 | 245 | ZINC000101361998 | 191 | 739 | 278 | ZINC000257347638 |
| 280 | ZINC000257347639 | 280 | ZINC000257347639 | 35 | 46174147 | 72 | 12600646 | 279 | ZINC000257347639 |
| 7 | 91820057 | 7 | 91820057 | 1 | 134695375 | 96 | 5284365 | 280 | ZINC000257347639 |
| 8 | 91820057 | 8 | 91820057 | 2 | 134695375 | 46 | 45266741 | 99 | 3052142 |
| 21 | 54758613 | 21 | 54758613 | 42 | 45357367 | 55 | 42612215 | 100 | 3052142 |
| 22 | 54758613 | 22 | 54758613 | 55 | 42612215 | 71 | 12600646 | 150 | 198514 |
| 29 | 51351654 | 29 | 51351654 | 82 | 7067772 | 86 | 6713972 | 219 | ZINC000032106887 |
| 30 | 51351654 | 30 | 51351654 | 85 | 6713972 | 82 | 7067772 | 220 | ZINC000032106887 |
| 125 | 440271 | 125 | 440271 | 86 | 6713972 | 29 | 51351654 | 221 | ZINC000032106889 |
| 126 | 440271 | 126 | 440271 | 162 | 112106 | 85 | 6713972 | 222 | ZINC000032106889 |
| 127 | 440235 | 127 | 440235 | 207 | ZINC000015205840 | 230 | ZINC000082114392 | 153 | 129618 |
| 128 | 440235 | 128 | 440235 | 208 | ZINC000015205840 | 35 | 46174147 | 154 | 129618 |
Moleculas_3_Total <- cbind(Moleculas_3, Nombres_3)
Moleculas_3_Total <- Moleculas_3_Total[, c(1, 6, 2, 7, 3, 8, 4, 9, 5, 10)]
kable(Moleculas_3_Total[1:20, ], align = "cccccccccc")
| LASSO | LASSO.1 | LASSO-MLR | LASSO-MLR.1 | GA-MLR | GA-MLR.1 | PLS | PLS.1 | RSM | RSM.1 |
|---|---|---|---|---|---|---|---|---|---|
| 172 | 446101 | 172 | 446101 | 7 | 118707521 | 10 | 91820057 | 7 | 118707521 |
| 173 | 446101 | 173 | 446101 | 8 | 118707521 | 187 | 440271 | 8 | 118707521 |
| 174 | 446101 | 174 | 446101 | 9 | 118707521 | 192 | 440235 | 9 | 118707521 |
| 322 | ZINC000019893747 | 322 | ZINC000019893747 | 16 | 76325328 | 190 | 440235 | 16 | 76325328 |
| 323 | ZINC000019893747 | 323 | ZINC000019893747 | 17 | 76325328 | 12 | 91820057 | 17 | 76325328 |
| 324 | ZINC000019893747 | 324 | ZINC000019893747 | 18 | 76325328 | 172 | 446101 | 18 | 76325328 |
| 412 | ZINC000257346903 | 412 | ZINC000257346903 | 367 | ZINC000101361998 | 189 | 440271 | 220 | 198515 |
| 413 | ZINC000257346903 | 413 | ZINC000257346903 | 369 | ZINC000101361998 | 191 | 440235 | 221 | 198515 |
| 414 | ZINC000257346903 | 414 | ZINC000257346903 | 143 | 5284365 | 362 | ZINC000101136391 | 222 | 198515 |
| 415 | ZINC000257347638 | 415 | ZINC000257347638 | 144 | 5284365 | 59 | 45933887 | 224 | 198514 |
| 416 | ZINC000257347638 | 416 | ZINC000257347638 | 368 | ZINC000101361998 | 144 | 5284365 | 225 | 198514 |
| 417 | ZINC000257347638 | 417 | ZINC000257347638 | 54 | 46174147 | 68 | 45266741 | 415 | ZINC000257347638 |
| 418 | ZINC000257347639 | 418 | ZINC000257347639 | 1 | 134695375 | 426 | ZINC000307565252 | 416 | ZINC000257347638 |
| 419 | ZINC000257347639 | 419 | ZINC000257347639 | 62 | 45357367 | 143 | 5284365 | 417 | ZINC000257347638 |
| 420 | ZINC000257347639 | 420 | ZINC000257347639 | 83 | 42612215 | 62 | 45357367 | 418 | ZINC000257347639 |
| 10 | 91820057 | 10 | 91820057 | 84 | 42612215 | 128 | 6713972 | 419 | ZINC000257347639 |
| 11 | 91820057 | 11 | 91820057 | 121 | 7067772 | 129 | 6713972 | 420 | ZINC000257347639 |
| 12 | 91820057 | 12 | 91820057 | 122 | 7067772 | 121 | 7067772 | 148 | 3052142 |
| 31 | 54758613 | 31 | 54758613 | 123 | 7067772 | 123 | 7067772 | 229 | 129618 |
| 32 | 54758613 | 32 | 54758613 | 127 | 6713972 | 43 | 51351654 | 231 | 129618 |
Recently, (Palacio-Rodríguez et al. 2019) proposed a new ranking function for virtual screening in molecular docking. According to the paper, this ranking function can be extrapolated into other ranking problems, such as this QSAR prediction. The function is:
\[\begin{align} \mathbf{P(i)} = \mathbf{\frac{1}{\sigma}\sum_{j}^{} \exp(-r_i^j)} \end{align}\]
Where i are the i-th molecule predicted with the ranking given by the j-th model (QSAR). \(\sigma\) is the expected value of the exponential distribution. This parameter takes account the number of molecules for each scoring function that will be considered (it can be seen as a threshold of the molecules that will be take account for the consensus). Finally, the sum gives the consensus ranking P(i) for the i-th molecule. Let’s apply this strategyECR <- function(X, Sig) {
EC <- (exp(-X/Sig))/Sig
output <- EC
return(output)
}
## Two variants
ECR_2 <- cbind(match(indices_2$Index, Moleculas_2_Total$LASSO), match(indices_2$Index,
Moleculas_2_Total$`LASSO-MLR`), match(indices_2$Index, Moleculas_2_Total$`GA-MLR`),
match(indices_2$Index, Moleculas_2_Total$PLS), match(indices_2$Index, Moleculas_2_Total$RSM))
colnames(ECR_2) <- c("LASSO", "LASSO-MLR", "GA-MLR", "PLS", "RSM")
ECR_Salida_2 <- t(as.data.frame(apply(ECR_2, 1, ECR, Sig = 20)))
Ranking_Consenso_2 <- as.data.frame(apply(ECR_Salida_2, 1, sum))
colnames(Ranking_Consenso_2) <- c("ECR")
row.names(Ranking_Consenso_2) <- as.vector(indices_2$Index)
Ranking_Consenso_2$ID <- as.vector(indices_2$Index)
Output_ECR_2 <- Ranking_Consenso_2[with(Ranking_Consenso_2, order(Ranking_Consenso_2$ECR,
decreasing = TRUE)), ]
colnames(Output_ECR_2) <- c("ECR", "Index")
Output_ECR_2 <- left_join(Output_ECR_2, indices_2, by = "Index")
kable(Output_ECR_2[1:20, ], align = "ccc")
| ECR | Index | Label |
|---|---|---|
| 0.1239104 | 5 | 118707521 |
| 0.1177955 | 6 | 118707521 |
| 0.1096457 | 11 | 76325328 |
| 0.1063287 | 12 | 76325328 |
| 0.1039851 | 277 | ZINC000257347638 |
| 0.0998926 | 8 | 91820057 |
| 0.0989136 | 278 | ZINC000257347638 |
| 0.0988349 | 22 | 54758613 |
| 0.0987525 | 115 | 446101 |
| 0.0940895 | 279 | ZINC000257347639 |
| 0.0932883 | 7 | 91820057 |
| 0.0925655 | 116 | 446101 |
| 0.0909432 | 21 | 54758613 |
| 0.0895007 | 280 | ZINC000257347639 |
| 0.0889071 | 125 | 440271 |
| 0.0864304 | 215 | ZINC000019893747 |
| 0.0824722 | 126 | 440271 |
| 0.0822214 | 216 | ZINC000019893747 |
| 0.0804882 | 1 | 134695375 |
| 0.0784499 | 127 | 440235 |
## Three variants
ECR_3 <- cbind(match(indices_3$Index, Moleculas_3_Total$LASSO), match(indices_3$Index,
Moleculas_3_Total$`LASSO-MLR`), match(indices_3$Index, Moleculas_3_Total$`GA-MLR`),
match(indices_3$Index, Moleculas_3_Total$PLS), match(indices_3$Index, Moleculas_3_Total$RSM))
colnames(ECR_3) <- c("LASSO", "LASSO-MLR", "GA-MLR", "PLS", "RSM")
ECR_Salida_3 <- t(as.data.frame(apply(ECR_3, 1, ECR, Sig = 20)))
Ranking_Consenso_3 <- as.data.frame(apply(ECR_Salida_3, 1, sum))
colnames(Ranking_Consenso_3) <- c("ECR")
row.names(Ranking_Consenso_3) <- as.vector(indices_3$Index)
Ranking_Consenso_3$ID <- as.vector(indices_3$Index)
Ranking_Consenso_3 <- as.data.frame(apply(ECR_Salida_3, 1, sum))
colnames(Ranking_Consenso_3) <- c("ECR")
row.names(Ranking_Consenso_3) <- as.vector(indices_3$Index)
Ranking_Consenso_3$ID <- as.vector(indices_3$Index)
Output_ECR_3 <- Ranking_Consenso_3[with(Ranking_Consenso_3, order(Ranking_Consenso_3$ECR,
decreasing = TRUE)), ]
colnames(Output_ECR_3) <- c("ECR", "Index")
Output_ECR_3 <- left_join(Output_ECR_3, indices_3, by = "Index")
kable(Output_ECR_3[1:20, ], align = "ccc")
| ECR | Index | Label |
|---|---|---|
| 0.1323101 | 172 | 446101 |
| 0.1108496 | 7 | 118707521 |
| 0.1054485 | 8 | 118707521 |
| 0.1003772 | 9 | 118707521 |
| 0.0945118 | 173 | 446101 |
| 0.0935430 | 16 | 76325328 |
| 0.0928678 | 10 | 91820057 |
| 0.0910086 | 322 | ZINC000019893747 |
| 0.0891044 | 17 | 76325328 |
| 0.0880936 | 415 | ZINC000257347638 |
| 0.0867441 | 174 | 446101 |
| 0.0846394 | 18 | 76325328 |
| 0.0837973 | 416 | ZINC000257347638 |
| 0.0799349 | 12 | 91820057 |
| 0.0797105 | 417 | ZINC000257347638 |
| 0.0779056 | 323 | ZINC000019893747 |
| 0.0758229 | 418 | ZINC000257347639 |
| 0.0744185 | 412 | ZINC000257346903 |
| 0.0741063 | 324 | ZINC000019893747 |
| 0.0740046 | 187 | 440271 |
Moleculas_2_curado <- cbind(Moleculas_2_Total$LASSO[!duplicated(Moleculas_2_Total$LASSO.1)],
Moleculas_2_Total$LASSO.1[!duplicated(Moleculas_2_Total$LASSO.1)], Moleculas_2_Total$`LASSO-MLR`[!duplicated(Moleculas_2_Total$`LASSO-MLR.1`)],
Moleculas_2_Total$`LASSO-MLR.1`[!duplicated(Moleculas_2_Total$`LASSO-MLR.1`)],
Moleculas_2_Total$`GA-MLR`[!duplicated(Moleculas_2_Total$`GA-MLR.1`)], Moleculas_2_Total$`GA-MLR.1`[!duplicated(Moleculas_2_Total$`GA-MLR.1`)],
Moleculas_2_Total$PLS[!duplicated(Moleculas_2_Total$PLS.1)], Moleculas_2_Total$PLS.1[!duplicated(Moleculas_2_Total$PLS.1)],
Moleculas_2_Total$RSM[!duplicated(Moleculas_2_Total$RSM.1)], Moleculas_2_Total$RSM.1[!duplicated(Moleculas_2_Total$RSM.1)])
colnames(Moleculas_2_curado) <- c("Lasso_Index", "Lasso_Molecule", "Lasso-MLR_Index",
"Lasso-MLR_Molecule", "GA-MLR_Index", "GA-MLR_Molecule", "PLS_Index", "PLS_Molecule",
"RSM_Index", "RSM_Molecule")
Moleculas_2_curado[, 1] <- ceiling(as.numeric(Moleculas_2_curado[, 1])/2)
Moleculas_2_curado[, 3] <- ceiling(as.numeric(Moleculas_2_curado[, 3])/2)
Moleculas_2_curado[, 5] <- ceiling(as.numeric(Moleculas_2_curado[, 5])/2)
Moleculas_2_curado[, 7] <- ceiling(as.numeric(Moleculas_2_curado[, 7])/2)
Moleculas_2_curado[, 9] <- ceiling(as.numeric(Moleculas_2_curado[, 9])/2)
kable(Moleculas_2_curado[1:20, ], align = "cccccccccc")
| Lasso_Index | Lasso_Molecule | Lasso-MLR_Index | Lasso-MLR_Molecule | GA-MLR_Index | GA-MLR_Molecule | PLS_Index | PLS_Molecule | RSM_Index | RSM_Molecule |
|---|---|---|---|---|---|---|---|---|---|
| 58 | 446101 | 58 | 446101 | 3 | 118707521 | 11 | 54758613 | 3 | 118707521 |
| 108 | ZINC000019893747 | 108 | ZINC000019893747 | 6 | 76325328 | 63 | 440271 | 6 | 76325328 |
| 138 | ZINC000257346903 | 138 | ZINC000257346903 | 123 | ZINC000101361998 | 4 | 91820057 | 74 | 198515 |
| 139 | ZINC000257347638 | 139 | ZINC000257347638 | 36 | 12600646 | 64 | 440235 | 75 | 198514 |
| 140 | ZINC000257347639 | 140 | ZINC000257347639 | 48 | 5284365 | 96 | 739 | 139 | ZINC000257347638 |
| 4 | 91820057 | 4 | 91820057 | 18 | 46174147 | 36 | 12600646 | 140 | ZINC000257347639 |
| 11 | 54758613 | 11 | 54758613 | 1 | 134695375 | 48 | 5284365 | 50 | 3052142 |
| 15 | 51351654 | 15 | 51351654 | 21 | 45357367 | 23 | 45266741 | 110 | ZINC000032106887 |
| 63 | 440271 | 63 | 440271 | 28 | 42612215 | 28 | 42612215 | 111 | ZINC000032106889 |
| 64 | 440235 | 64 | 440235 | 41 | 7067772 | 43 | 6713972 | 77 | 129618 |
| 1 | 134695375 | 1 | 134695375 | 43 | 6713972 | 41 | 7067772 | 9 | 57339290 |
| 2 | 129878031 | 2 | 129878031 | 81 | 112106 | 15 | 51351654 | 142 | ZINC000307565252 |
| 3 | 118707521 | 3 | 118707521 | 104 | ZINC000015205840 | 115 | ZINC000082114392 | 137 | ZINC000257346492 |
| 5 | 90659182 | 5 | 90659182 | 105 | ZINC000015205946 | 18 | 46174147 | 71 | 312829 |
| 6 | 76325328 | 6 | 76325328 | 132 | ZINC000106384102 | 1 | 134695375 | 78 | 127132 |
| 7 | 57376616 | 7 | 57376616 | 32 | 24779679 | 21 | 45357367 | 83 | 99461 |
| 8 | 57339292 | 8 | 57339292 | 25 | 44341508 | 104 | ZINC000015205840 | 118 | ZINC000100815672 |
| 9 | 57339290 | 9 | 57339290 | 82 | 102416 | 9 | 57339290 | 17 | 49802606 |
| 10 | 54758653 | 10 | 54758653 | 85 | 90220 | 81 | 112106 | 130 | ZINC000106382957 |
| 12 | 52948856 | 12 | 52948856 | 87 | 82398 | 142 | ZINC000307565252 | 131 | ZINC000106382965 |
Moleculas_3_curado <- cbind(Moleculas_3_Total$LASSO[!duplicated(Moleculas_3_Total$LASSO.1)],
Moleculas_3_Total$LASSO.1[!duplicated(Moleculas_3_Total$LASSO.1)], Moleculas_3_Total$`LASSO-MLR`[!duplicated(Moleculas_3_Total$`LASSO-MLR.1`)],
Moleculas_3_Total$`LASSO-MLR.1`[!duplicated(Moleculas_3_Total$`LASSO-MLR.1`)],
Moleculas_3_Total$`GA-MLR`[!duplicated(Moleculas_3_Total$`GA-MLR.1`)], Moleculas_3_Total$`GA-MLR.1`[!duplicated(Moleculas_3_Total$`GA-MLR.1`)],
Moleculas_3_Total$PLS[!duplicated(Moleculas_3_Total$PLS.1)], Moleculas_3_Total$PLS.1[!duplicated(Moleculas_3_Total$PLS.1)],
Moleculas_3_Total$RSM[!duplicated(Moleculas_3_Total$RSM.1)], Moleculas_3_Total$RSM.1[!duplicated(Moleculas_3_Total$RSM.1)])
colnames(Moleculas_3_curado) <- c("Lasso_Index", "Lasso_Molecule", "Lasso-MLR_Index",
"Lasso-MLR_Molecule", "GA-MLR_Index", "GA-MLR_Molecule", "PLS_Index", "PLS_Molecule",
"RSM_Index", "RSM_Molecule")
Moleculas_3_curado[, 1] <- ceiling(as.numeric(Moleculas_3_curado[, 1])/3)
Moleculas_3_curado[, 3] <- ceiling(as.numeric(Moleculas_3_curado[, 3])/3)
Moleculas_3_curado[, 5] <- ceiling(as.numeric(Moleculas_3_curado[, 5])/3)
Moleculas_3_curado[, 7] <- ceiling(as.numeric(Moleculas_3_curado[, 7])/3)
Moleculas_3_curado[, 9] <- ceiling(as.numeric(Moleculas_3_curado[, 9])/3)
kable(Moleculas_3_curado[1:20, ], align = "cccccccccc")
| Lasso_Index | Lasso_Molecule | Lasso-MLR_Index | Lasso-MLR_Molecule | GA-MLR_Index | GA-MLR_Molecule | PLS_Index | PLS_Molecule | RSM_Index | RSM_Molecule |
|---|---|---|---|---|---|---|---|---|---|
| 58 | 446101 | 58 | 446101 | 3 | 118707521 | 4 | 91820057 | 3 | 118707521 |
| 108 | ZINC000019893747 | 108 | ZINC000019893747 | 6 | 76325328 | 63 | 440271 | 6 | 76325328 |
| 138 | ZINC000257346903 | 138 | ZINC000257346903 | 123 | ZINC000101361998 | 64 | 440235 | 74 | 198515 |
| 139 | ZINC000257347638 | 139 | ZINC000257347638 | 48 | 5284365 | 58 | 446101 | 75 | 198514 |
| 140 | ZINC000257347639 | 140 | ZINC000257347639 | 18 | 46174147 | 121 | ZINC000101136391 | 139 | ZINC000257347638 |
| 4 | 91820057 | 4 | 91820057 | 1 | 134695375 | 20 | 45933887 | 140 | ZINC000257347639 |
| 11 | 54758613 | 11 | 54758613 | 21 | 45357367 | 48 | 5284365 | 50 | 3052142 |
| 15 | 51351654 | 15 | 51351654 | 28 | 42612215 | 23 | 45266741 | 77 | 129618 |
| 63 | 440271 | 63 | 440271 | 41 | 7067772 | 142 | ZINC000307565252 | 110 | ZINC000032106887 |
| 64 | 440235 | 64 | 440235 | 43 | 6713972 | 21 | 45357367 | 111 | ZINC000032106889 |
| 1 | 134695375 | 1 | 134695375 | 44 | 6420074 | 43 | 6713972 | 9 | 57339290 |
| 2 | 129878031 | 2 | 129878031 | 53 | 2733335 | 41 | 7067772 | 108 | ZINC000019893747 |
| 3 | 118707521 | 3 | 118707521 | 81 | 112106 | 15 | 51351654 | 142 | ZINC000307565252 |
| 5 | 90659182 | 5 | 90659182 | 104 | ZINC000015205840 | 44 | 6420074 | 137 | ZINC000257346492 |
| 6 | 76325328 | 6 | 76325328 | 105 | ZINC000015205946 | 18 | 46174147 | 71 | 312829 |
| 7 | 57376616 | 7 | 57376616 | 132 | ZINC000106384102 | 53 | 2733335 | 78 | 127132 |
| 8 | 57339292 | 8 | 57339292 | 32 | 24779679 | 1 | 134695375 | 83 | 99461 |
| 9 | 57339290 | 9 | 57339290 | 25 | 44341508 | 104 | ZINC000015205840 | 118 | ZINC000100815672 |
| 10 | 54758653 | 10 | 54758653 | 82 | 102416 | 9 | 57339290 | 138 | ZINC000257346903 |
| 12 | 52948856 | 12 | 52948856 | 85 | 90220 | 126 | ZINC000102974493 | 13 | 52944904 |
Output_ECR_2_curado <- cbind(Output_ECR_2$ECR[!duplicated(Output_ECR_2$Label)], Output_ECR_2$Index[!duplicated(Output_ECR_2$Label)],
Output_ECR_2$Label[!duplicated(Output_ECR_2$Label)])
colnames(Output_ECR_2_curado) <- c("ECR", "Index", "Molecule")
Output_ECR_2_curado[, 2] <- ceiling(as.numeric(Output_ECR_2_curado[, 2])/2)
kable(Output_ECR_2_curado[1:20, ], align = "ccc")
| ECR | Index | Molecule |
|---|---|---|
| 0.123910394377029 | 3 | 118707521 |
| 0.10964571955221 | 6 | 76325328 |
| 0.103985062328752 | 139 | ZINC000257347638 |
| 0.0998926021126605 | 4 | 91820057 |
| 0.0988349376415874 | 11 | 54758613 |
| 0.0987524816241701 | 58 | 446101 |
| 0.0940895291726436 | 140 | ZINC000257347639 |
| 0.0889070691073631 | 63 | 440271 |
| 0.0864304316353736 | 108 | ZINC000019893747 |
| 0.0804882060518684 | 1 | 134695375 |
| 0.0784499443559222 | 64 | 440235 |
| 0.078082494327749 | 138 | ZINC000257346903 |
| 0.0686878753936395 | 15 | 51351654 |
| 0.0670244674612893 | 36 | 12600646 |
| 0.0628736414964675 | 48 | 5284365 |
| 0.0573643004621222 | 18 | 46174147 |
| 0.0542501126233325 | 28 | 42612215 |
| 0.0493435166015162 | 9 | 57339290 |
| 0.0480181977442779 | 21 | 45357367 |
| 0.0469106652810004 | 41 | 7067772 |
Output_ECR_3_curado <- cbind(Output_ECR_3$ECR[!duplicated(Output_ECR_3$Label)], Output_ECR_3$Index[!duplicated(Output_ECR_3$Label)],
Output_ECR_3$Label[!duplicated(Output_ECR_3$Label)])
colnames(Output_ECR_3_curado) <- c("ECR", "Index", "Molecule")
Output_ECR_3_curado[, 2] <- ceiling(as.numeric(Output_ECR_3_curado[, 2])/3)
kable(Output_ECR_3_curado[1:20, ], align = "ccc")
| ECR | Index | Molecule |
|---|---|---|
| 0.132310100606202 | 58 | 446101 |
| 0.110849573814621 | 3 | 118707521 |
| 0.0935430282121767 | 6 | 76325328 |
| 0.0928677690607199 | 4 | 91820057 |
| 0.0910086285115274 | 108 | ZINC000019893747 |
| 0.0880936480494446 | 139 | ZINC000257347638 |
| 0.0758229257026819 | 140 | ZINC000257347639 |
| 0.0744184874508252 | 138 | ZINC000257346903 |
| 0.0740045912479688 | 63 | 440271 |
| 0.0656928404795316 | 64 | 440235 |
| 0.0596569652183867 | 1 | 134695375 |
| 0.0592038919518806 | 48 | 5284365 |
| 0.0516837253833607 | 15 | 51351654 |
| 0.0500240637735259 | 21 | 45357367 |
| 0.0450734356889405 | 11 | 54758613 |
| 0.0448522533185709 | 18 | 46174147 |
| 0.0417817679463113 | 41 | 7067772 |
| 0.0400215026402701 | 43 | 6713972 |
| 0.0352496273855809 | 123 | ZINC000101361998 |
| 0.0352353230855984 | 74 | 198515 |
ECR_2_final <- cbind(match(indices_2$Index, Moleculas_2_Total$`LASSO-MLR`), match(indices_2$Index,
Moleculas_2_Total$`GA-MLR`), match(indices_2$Index, Moleculas_2_Total$PLS), match(indices_2$Index,
Moleculas_2_Total$RSM))
colnames(ECR_2_final) <- c("LASSO-MLR", "GA-MLR", "PLS", "RSM")
ECR_Salida_2_final <- t(as.data.frame(apply(ECR_2_final, 1, ECR, Sig = 20)))
Ranking_Consenso_2_final <- as.data.frame(apply(ECR_Salida_2_final, 1, sum))
colnames(Ranking_Consenso_2_final) <- c("ECR")
row.names(Ranking_Consenso_2_final) <- as.vector(indices_2$Index)
Ranking_Consenso_2_final$ID <- as.vector(indices_2$Index)
Output_ECR_2_final <- Ranking_Consenso_2_final[with(Ranking_Consenso_2_final, order(Ranking_Consenso_2_final$ECR,
decreasing = TRUE)), ]
colnames(Output_ECR_2_final) <- c("ECR", "Index")
Output_ECR_2_final <- left_join(Output_ECR_2_final, indices_2, by = "Index")
Output_ECR_2_final <- cbind(Output_ECR_2_final$ECR[!duplicated(Output_ECR_2_final$Label)],
Output_ECR_2_final$Index[!duplicated(Output_ECR_2_final$Label)], Output_ECR_2_final$Label[!duplicated(Output_ECR_2_final$Label)])
colnames(Output_ECR_2_final) <- c("ECR", "Index", "Molecule")
Output_ECR_2_final[, 2] <- ceiling(as.numeric(Output_ECR_2_final[, 2])/2)
kable(Output_ECR_2_final[1:20, ], align = "ccc")
| ECR | Index | Molecule |
|---|---|---|
| 0.109585154534019 | 3 | 118707521 |
| 0.0979172051475202 | 6 | 76325328 |
| 0.074005672452017 | 11 | 54758613 |
| 0.0724520203079591 | 4 | 91820057 |
| 0.0687506578428166 | 139 | ZINC000257347638 |
| 0.0675363225099268 | 63 | 440271 |
| 0.0663460395106793 | 36 | 12600646 |
| 0.0629913185963106 | 1 | 134695375 |
| 0.0626693029245443 | 48 | 5284365 |
| 0.0622081215915549 | 140 | ZINC000257347639 |
| 0.0591128931831972 | 64 | 440235 |
| 0.0530496211371537 | 18 | 46174147 |
| 0.0526628308044291 | 28 | 42612215 |
| 0.0511910103991344 | 58 | 446101 |
| 0.0464991779285494 | 41 | 7067772 |
| 0.0453286089440921 | 43 | 6713972 |
| 0.0450695477565888 | 15 | 51351654 |
| 0.044977694613017 | 21 | 45357367 |
| 0.0433950328141207 | 108 | ZINC000019893747 |
| 0.0406548194289939 | 9 | 57339290 |
ECR_3_final <- cbind(match(indices_3$Index, Moleculas_3_Total$`LASSO-MLR`), match(indices_3$Index,
Moleculas_3_Total$`GA-MLR`), match(indices_3$Index, Moleculas_3_Total$PLS), match(indices_3$Index,
Moleculas_3_Total$RSM))
colnames(ECR_3_final) <- c("LASSO-MLR", "GA-MLR", "PLS", "RSM")
ECR_Salida_3_final <- t(as.data.frame(apply(ECR_3_final, 1, ECR, Sig = 20)))
Ranking_Consenso_3_final <- as.data.frame(apply(ECR_Salida_3_final, 1, sum))
colnames(Ranking_Consenso_3_final) <- c("ECR")
row.names(Ranking_Consenso_3_final) <- as.vector(indices_3$Index)
Ranking_Consenso_3_final$ID <- as.vector(indices_3$Index)
Output_ECR_3_final <- Ranking_Consenso_3_final[with(Ranking_Consenso_3_final, order(Ranking_Consenso_3_final$ECR,
decreasing = TRUE)), ]
colnames(Output_ECR_3_final) <- c("ECR", "Index")
Output_ECR_3_final <- left_join(Output_ECR_3_final, indices_3, by = "Index")
Output_ECR_3_final <- cbind(Output_ECR_3_final$ECR[!duplicated(Output_ECR_3_final$Label)],
Output_ECR_3_final$Index[!duplicated(Output_ECR_3_final$Label)], Output_ECR_3_final$Label[!duplicated(Output_ECR_3_final$Label)])
colnames(Output_ECR_3_final) <- c("ECR", "Index", "Molecule")
Output_ECR_3_final[, 2] <- ceiling(as.numeric(Output_ECR_3_final[, 2])/3)
kable(Output_ECR_3_final[1:20, ], align = "ccc")
| ECR | Index | Molecule |
|---|---|---|
| 0.102987715498939 | 3 | 118707521 |
| 0.0877188203235018 | 6 | 76325328 |
| 0.0847486293811661 | 58 | 446101 |
| 0.0704013208548588 | 4 | 91820057 |
| 0.0596793514049593 | 63 | 440271 |
| 0.0591908290240155 | 48 | 5284365 |
| 0.0577671150638129 | 139 | ZINC000257347638 |
| 0.054279319942693 | 64 | 440235 |
| 0.0500720908576283 | 108 | ZINC000019893747 |
| 0.0497206368646311 | 140 | ZINC000257347639 |
| 0.0492358429492832 | 21 | 45357367 |
| 0.0490445665270496 | 1 | 134695375 |
| 0.0436763660257705 | 18 | 46174147 |
| 0.0417405127001481 | 41 | 7067772 |
| 0.0399924305596604 | 43 | 6713972 |
| 0.0391840829648896 | 138 | ZINC000257346903 |
| 0.0352496270434173 | 123 | ZINC000101361998 |
| 0.0352348647812116 | 74 | 198515 |
| 0.0350401711984567 | 15 | 51351654 |
| 0.031900896754871 | 121 | ZINC000101136391 |
Molecule 115
Molecule 215
Molecule 275
Molecule 5
Molecule 11
Molecule 246
Molecule 22
Molecule 125
Molecule 8
Molecule 5
Molecule 11
Molecule 147
Molecule 172
Molecule 322
Molecule 412
Molecule 7
Molecule 16
Molecule 367
Molecule 10
Molecule 187
Molecule 192
Molecule 7
Molecule 16
Molecule 220
Molecule 5
Molecule 11
Molecule 22
Molecule 7
Molecule 16
Molecule 123
One of the most important feature of these QSAR models are their physicochemical interpretation, since the correlation between these characteristics and the biological activity of the ligands can be used to propose new ligands and to get more insights about the mechanism of action.
So, the common physicochemical features are the presence of C-S atoms at a certain topological distance, and at least one hydrogen bond donator and one hydrogen bond aceptor with a liphophilic group and at least one 6-membered ring. Please refer to the manuscript to get more information about